Attribute Information:

Date Time: Each ten minutes. Temperature: Weather Temperature of Tetouan city. Humidity: Weather Humidity of Tetouan city. Wind Speed of Tetouan city. general diffuse flows diffuse flows power consumption of zone 1 of Tetouan city. power consumption of zone 2 of Tetouan city. power consumption of zone 3 of Tetouan city.

rm(list = ls()) #initialization
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.8
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
library(lubridate)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
## 
##     first, last
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(zoo)
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
library(ggplot2)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# read the real data on July 1 for test
df <- read.csv("Tetuan City power consumption.csv")
colnames(df)
## [1] "DateTime"                  "Temperature"              
## [3] "Humidity"                  "Wind.Speed"               
## [5] "general.diffuse.flows"     "diffuse.flows"            
## [7] "Zone.1.Power.Consumption"  "Zone.2..Power.Consumption"
## [9] "Zone.3..Power.Consumption"
# check for missing values
sum(is.na(df$Zone1))
## [1] 0
colnames(df)[7] <- "Zone1"
colnames(df)[8] <- "Zone2"
colnames(df)[9] <- "Zone3"
date <- seq(from=as.POSIXct("2017-01-01 00:00", format = "%Y-%m-%d %H:%M"), length.out = nrow(df), by = "10 min")
df$DateTime <- date #df(df$DateTime, format="%m/%d/%Y %H:%M")
#attr(df$DateTime, "tzone") <- "Africa/Casablanca"

df$year <- year(df$DateTime)
df$month <- month(df$DateTime)
df$week <- week(df$DateTime)
df$day <- day(df$DateTime)
df$hour <- hour(df$DateTime)
df$minute <- minute(df$DateTime)

train test split

df_xts <- xts(df[,7], date) 

daily

train_daily <- df_xts['2017-01-01/2017-12-29']
test_daily <- df_xts['2017-12-30']
plot(train_daily, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

plot(test_daily, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

weekly

#df_weekly <- filter(df,week == 1)
#Z1_weekly <- ts(df_weekly$Zone1, frequency=6*24*52, start=c(2017,1))
#plot(Z1_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")
train_weekly <- df_xts['2017-01-01/2017-12-23']
test_weekly <- df_xts['2017-12-24/2017-12-30']
plot(train_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

plot(test_weekly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

monthly

train_monthly <- df_xts['2017-01-01/2017-11-30']
test_monthly <- df_xts['2017-12-01/2017-12-30']
plot(train_monthly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

plot(test_monthly, xlab = "Time", ylab = "Watt Hours", main = "Zone 1 Power Consumption")

Seasonal Arima

daily

comp_daily <- decompose(ts(train_weekly,frequency = 6*24))
plot(comp_daily)

time_sta <- Sys.time()
fit_daily = stlf(ts(train_daily,frequency = 6*24))
fit_daily$model
## ETS(A,N,N) 
## 
## Call:
##  ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend) 
## 
##   Smoothing parameters:
##     alpha = 0.9989 
## 
##   Initial states:
##     l = 35146.4894 
## 
##   sigma:  278.7779
## 
##     AIC    AICc     BIC 
## 1156524 1156524 1156551
time_end <- Sys.time()
# forecast
pred_daily <- forecast(fit_daily,h=nrow(test_daily))
autoplot(pred_daily)

time1 <- time_end-time_sta
time1
## Time difference of 3.626149 secs
#pred_daily <- forecast(fit_daily, h=nrow(test_daily))
pred_df_daily <- data.table(time=index(test_daily), test_val=test_daily[,1], pred_val=pred_daily$mean)
d<- melt(pred_df_daily, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: daily",
        x ="Time", y = "Watt Hours")

weekly

comp_weekly <- decompose(ts(train_weekly,frequency = 6*24*52))
plot(comp_weekly)

time_sta <- Sys.time()
fit_weekly = stlf(ts(train_weekly,frequency = 6*24*52))
fit_weekly$model
## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.6346 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 31972.1007 
##     b = -337.3653 
## 
##   sigma:  394.0297
## 
##     AIC    AICc     BIC 
## 1172130 1172130 1172183
time_end <- Sys.time()
pred_weekly <- forecast(fit_weekly, h=nrow(test_weekly))
autoplot(pred_weekly)

time2 <- time_end-time_sta
time2
## Time difference of 4.178516 secs
#pred_weekly <- forecast(fit_weekly, h=nrow(test_weekly))
pred_df_weekly <- data.table(time=index(test_weekly), test_val=test_weekly[,1], pred_val=pred_weekly$mean)
d<- melt(pred_df_weekly, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: weekly",
        x ="Time", y = "Watt Hours")

monthly

train test split

comp_monthly <- decompose(ts(train_monthly,frequency = 6*24*30))
plot(comp_monthly)

fit_monthly = stlf(ts(train_monthly,frequency = 6*24*30))
fit_monthly$model
## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.6058 
##     phi   = 0.8001 
## 
##   Initial states:
##     l = 32492.7206 
##     b = -306.6227 
## 
##   sigma:  408.1339
## 
##     AIC    AICc     BIC 
## 1096795 1096795 1096848
pred_monthly <- forecast(fit_weekly, h=nrow(test_weekly))
autoplot(pred_monthly)

#pred_monthly <- forecast(fit_monthly, h=nrow(test_monthly))
pred_df_monthly <- data.table(time=index(test_monthly), test_val=test_monthly[,1], pred_val=pred_monthly$mean)
## Warning in as.data.table.list(x, keep.rownames = keep.rownames, check.names =
## check.names, : Item 3 has 1008 rows but longest item has 4320; recycled with
## remainder.
d<- melt(pred_df_monthly, id.vars = "time")
p <- ggplot(d, aes(x=time, y=value, color=variable)) + geom_point(size=1) + geom_line()
p + labs(title="Zone 1 Power Consumption: monthly",
        x ="Time", y = "Watt Hours")

annual

fit_annual = auto.arima(ts(df$Zone1,frequency = 6*24*365), seasonal = T)
fit_annual
## Series: ts(df$Zone1, frequency = 6 * 24 * 365) 
## ARIMA(3,1,1) 
## 
## Coefficients:
##          ar1      ar2     ar3      ma1
##       1.0600  -0.2510  0.0675  -0.4518
## s.e.  0.0299   0.0183  0.0064   0.0298
## 
## sigma^2 = 202928:  log likelihood = -394643.6
## AIC=789297.2   AICc=789297.2   BIC=789341.6
pred_annual <- forecast(fit_annual, h=nrow(test_daily))
pred_df_annual <- data.table(time=index(df_xts), test_val=test_daily[,1], pred_val=pred_annual$mean)
d<- melt(pred_df_annual, id.vars = "time")
ggplot(d, aes(x=time, y=value, color=variable)) + 
  geom_point(size=1) + 
  geom_line()

smape(xts(pred_df_daily$pred_val, order.by = pred_df_daily$time), test_daily)
## [1] 0.06732304
smape(xts(pred_df_weekly$pred_val, order.by = pred_df_weekly$time), test_weekly)
## [1] 0.07291239
smape(xts(pred_df_monthly$pred_val, order.by = pred_df_monthly$time), test_monthly)
## [1] 0.06944146
smape(pred_annual$mean, test_daily[,1])
## Warning in ae(actual, predicted): Incompatible methods ("Ops.ts", "Ops.xts") for
## "-"
## Warning in mean(ae(actual, predicted)/(abs(actual) + abs(predicted))):
## Incompatible methods ("Ops.ts", "Ops.xts") for "+"
## [1] 0.2071637
Metrics::rmse(pred_df_daily$pred_val, test_daily)
## Warning in se(actual, predicted): Incompatible methods ("Ops.ts", "Ops.xts") for
## "-"
## [1] 2171.09
Metrics::rmse(pred_df_weekly$pred_val, test_weekly)
## Warning in se(actual, predicted): Incompatible methods ("Ops.ts", "Ops.xts") for
## "-"
## [1] 2559.248
Metrics::rmse(pred_df_monthly$pred_val, test_monthly)
## [1] 2444.629